#!/bin/bash 
# Generic NiCad3 cross-system extract and find clones script
# J.R. Cordy, Queen's University, January 2011

usage() {
    echo "Usage:  NiCad3Cross granularity language systemdir system2dir [ config ]"
    echo "          where granularity is one of:  { functions blocks ... }"
    echo "          and   language    is one of:  { c java cs py ... }"
    echo "          and   config      is one of:  { blindrename ... }"
    echo ""
}

echo ""
echo "NiCad3 Cross-Clone Detector v3.5 (19 Mar 2013)"
echo ""

# check we have TXL
txlversion=`txl -V 2>&1 | grep 10.[56]`
if [ "${txlversion}" = "" ]
then
    echo "*** Error:  NiCad3 requires FreeTXL 10.5i or later"
    echo ""
    exit 99
fi

# check we have an installation
if [ -d "$1" ]
then
    lib="$1"
    shift
else
    lib=.
fi

if [ ! -d ${lib}/tools ]
then
    echo "*** Error:  Cannot find NiCad3 installation ${lib}"
    echo ""
    exit 99
fi
 
# check we compiled the tools
if [ ! -x ${lib}/tools/crossclones.x ]
then
    echo "*** Error:  Missing ${lib}/tools/crossclones.x - type 'make' to make the NiCad3 tools"
    echo ""
    exit 99
fi

# check granularity
if [ "$1" != "" ] 
then
    granularity=$1
    shift
else
    usage
    exit 99
fi

# check language
if [ "$1" != "" ] 
then
    language=$1
    shift
else
    usage
    exit 99
fi

# check we have a system directory
if [ -d "$1" ]
then
    system1=$1
    shift
else
    usage
    exit 99
fi

# check we have a second system directory 
if [ -d "$1" ]
then
    system2=$1
    shift
else
    usage
fi

# check for a configuration
if [ "$1" = "" ]
then
    config=${lib}/config/default.cfg
else
    config=${lib}/config/$1.cfg
fi

if [ ! -s "${config}" ]
then
    usage
    exit 99
fi

echo "config=${config}"

# get NiCad3 configuration parameters
source ${config}

echo "system1=${system1}"
echo "system2=${system2}"
echo "threshold=${threshold}"
echo "granularity=${granularity}"
echo "language=${language}"
echo "transform=${transform}"
echo "rename=${rename}"
echo "filter=${filter}"
echo "abstract=${abstract}"
echo "normalize=${normalize}"
echo "cluster=${cluster}"
echo "report=${report}"
echo ""

# Check we have a system
if [ ! -d "${system1}" ]
then
    echo "*** ERROR: Can't find system source directory ${system1}"
    exit 99
fi

# And a second system to compare to it
if [ ! -d "${system2}" ]
then
    echo "*** ERROR: Can't find system source directory ${system2}"
    exit 99
fi

# Extract system potential clones
date
datestamp=`date +%F-%T`
echo ""

if [ -s "${system1}_${granularity}.xml" ]
then
    echo "Using previously extracted ${granularity} from ${language} files in ${system1}"
    echo > ${system1}_${granularity}-crossclones-${datestamp}.log 2>&1
else
    echo "Extracting ${granularity} from ${language} files in ${system1}"
    /usr/bin/time --output=/tmp/time$$ ${lib}/scripts/Extract ${granularity} ${language} ${system1} > ${system1}_${granularity}-crossclones-${datestamp}.log 2>&1
    echo ""
    /bin/cat /tmp/time$$
fi

result=$?
echo ""

if [ $result -ge 99 ]
then
    echo "*** ERROR: Extraction failed, code $result"
    echo "Detailed log in ${system1}_${granularity}-crossclones-${datestamp}.log"
    echo ""
    exit 99
fi

# Check for parsing problems
syntaxerrors=`(grep "TXL019[12]E" ${system1}_${granularity}-crossclones-${datestamp}.log | wc -l)`
if [ ${syntaxerrors} != 0 ]
then
    if [ ${syntaxerrors} = 1 ]
    then
	echo "*** Warning: 1 source file failed to parse"
    else
	echo "*** Warning: ${syntaxerrors} source files failed to parse"
    fi
    echo ""
fi

npcs=`grep "^<source " ${system1}_${granularity}.xml | wc -l`
echo "Extracted ${npcs} ${granularity}"
echo ""

pc1file=${system1}_${granularity}


# Extract second system potential clones
date
datestamp=`date +%F-%T`
echo ""

if [ -s "${system2}_${granularity}.xml" ]
then
    echo "Using previously extracted ${granularity} from ${language} files in ${system2}"
    echo > ${system2}_${granularity}-crossclones-${datestamp}.log 2>&1
else
    echo "Extracting ${granularity} from ${language} files in ${system2}"
    /usr/bin/time --output=/tmp/time$$ ${lib}/scripts/Extract ${granularity} ${language} ${system2} > ${system2}_${granularity}-crossclones-${datestamp}.log 2>&1
    echo ""
    /bin/cat /tmp/time$$
fi

result=$?
echo ""

if [ $result -ge 99 ]
then
    echo "*** ERROR: Extraction failed, code $result"
    echo "Detailed log in ${system2}_${granularity}-crossclones-${datestamp}.log"
    echo ""
    exit 99
fi

# Check for parsing problems
syntaxerrors=`(grep "TXL019[12]E" ${system2}_${granularity}-crossclones-${datestamp}.log | wc -l)`
if [ ${syntaxerrors} != 0 ]
then
    if [ ${syntaxerrors} = 1 ]
    then
	echo "*** Warning: 1 source file failed to parse"
    else
	echo "*** Warning: ${syntaxerrors} source files failed to parse"
    fi
    echo ""
fi

npcs=`grep "^<source " ${system2}_${granularity}.xml | wc -l`
echo "Extracted ${npcs} ${granularity}"
echo ""

pc2file=${system2}_${granularity}


# Check for transformation to be done
if [ "${transform}" != none ]
then
    if [ -s "${pc1file}-${transform}.xml" ]
    then
	echo "Using previously ${transform} transformd extracted ${granularity} from ${language} files in ${system1}"
    else
	echo "Applying ${transform} transformation to extracted ${granularity} from ${language} files in ${system1}"
	/usr/bin/time --output=/tmp/time$$ ${lib}/scripts/Transform ${granularity} ${language} ${pc1file}.xml ${transform} >> ${system1}_${granularity}-crossclones-${datestamp}.log 2>&1
        echo ""
        /bin/cat /tmp/time$$
    fi

    result=$?
    echo ""

    if [ $result != 0 ]
    then
        echo "*** ERROR: Renaming failed, code $result"
        echo "Detailed log in ${system1}_${granularity}-crossclones-${datestamp}.log"
        echo ""
        exit 99
    fi

    pc1file=${pc1file}-${transform}

    if [ -s "${pc2file}-${transform}.xml" ]
    then
	echo "Using previously ${transform} transformd extracted ${granularity} from ${language} files in ${system2}"
    else
	echo "Applying ${transform} transformation to extracted ${granularity} from ${language} files in ${system2}"
	/usr/bin/time --output=/tmp/time$$ ${lib}/scripts/Transform ${granularity} ${language} ${pc2file}.xml ${transform} >> ${system2}_${granularity}-crossclones-${datestamp}.log 2>&1
        echo ""
        /bin/cat /tmp/time$$
    fi

    result=$?
    echo ""

    if [ $result != 0 ]
    then
        echo "*** ERROR: Renaming failed, code $result"
        echo "Detailed log in ${system2}_${granularity}-crossclones-${datestamp}.log"
        echo ""
        exit 99
    fi

    pc2file=${pc2file}-${transform}
fi

# Check for renaming to be done
if [ "${rename}" != none ]
then
    if [ -s "${pc1file}-${rename}.xml" ]
    then
	echo "Using previously ${rename} renamed extracted ${granularity} from ${language} files in ${system1}"
    else
	echo "Applying ${rename} renaming to extracted ${granularity} from ${language} files in ${system1}"
	/usr/bin/time --output=/tmp/time$$ ${lib}/scripts/Rename ${granularity} ${language} ${pc1file}.xml ${rename} >> ${system1}_${granularity}-crossclones-${datestamp}.log 2>&1
        echo ""
        /bin/cat /tmp/time$$
    fi

    result=$?
    echo ""

    if [ $result != 0 ]
    then
        echo "*** ERROR: Renaming failed, code $result"
        echo "Detailed log in ${system1}_${granularity}-crossclones-${datestamp}.log"
        echo ""
        exit 99
    fi

    pc1file=${pc1file}-${rename}

    if [ -s "${pc2file}-${rename}.xml" ]
    then
	echo "Using previously ${rename} renamed extracted ${granularity} from ${language} files in ${system2}"
    else
	echo "Applying ${rename} renaming to extracted ${granularity} from ${language} files in ${system2}"
	/usr/bin/time --output=/tmp/time$$ ${lib}/scripts/Rename ${granularity} ${language} ${pc2file}.xml ${rename} >> ${system2}_${granularity}-crossclones-${datestamp}.log 2>&1
        echo ""
        /bin/cat /tmp/time$$
    fi

    result=$?
    echo ""

    if [ $result != 0 ]
    then
        echo "*** ERROR: Renaming failed, code $result"
        echo "Detailed log in ${system2}_${granularity}-crossclones-${datestamp}.log"
        echo ""
        exit 99
    fi

    pc2file=${pc2file}-${rename}
fi

# Check for filtering to be done
if [ "${filter}" != none ]
then
    if [ -s "${pc1file}-filter.xml" ]
    then
	echo "Using previously filtered extracted ${granularity} from ${language} files in ${system1}"
    else
	echo "Applying filtering of ${filter} to extracted ${granularity} from ${language} files in ${system1}"
	/usr/bin/time --output=/tmp/time$$ ${lib}/scripts/Filter ${granularity} ${language} ${pc1file}.xml ${filter} >> ${system1}_${granularity}-crossclones-${datestamp}.log 2>&1
        echo ""
        /bin/cat /tmp/time$$
    fi

    result=$?
    echo ""

    if [ $result != 0 ]
    then
        echo "*** ERROR: Filtering failed, code $result"
        echo "Detailed log in ${system1}_${granularity}-crossclones-${datestamp}.log"
        echo ""
        exit 99
    fi

    pc1file=${pc1file}-filter

    if [ -s "${pc2file}-filter.xml" ]
    then
	echo "Using previously filtered extracted ${granularity} from ${language} files in ${system2}"
    else
	echo "Applying filtering of ${filter} to extracted ${granularity} from ${language} files in ${system2}"
	/usr/bin/time --output=/tmp/time$$ ${lib}/scripts/Filter ${granularity} ${language} ${pc2file}.xml ${filter} >> ${system2}_${granularity}-crossclones-${datestamp}.log 2>&1
        echo ""
        /bin/cat /tmp/time$$
    fi

    result=$?
    echo ""

    if [ $result != 0 ]
    then
        echo "*** ERROR: Filtering failed, code $result"
        echo "Detailed log in ${system2}_${granularity}-crossclones-${datestamp}.log"
        echo ""
        exit 99
    fi

    pc2file=${pc2file}-filter
fi

# Check for abstraction to be done
if [ "${abstract}" != none ]
then
    if [ -s "${pc1file}-abstract.xml" ]
    then
	echo "Using previously abstracted extracted ${granularity} from ${language} files in ${system1}"
    else
	echo "Applying abstraction of ${abstract} to extracted ${granularity} from ${language} files in ${system1}"
	/usr/bin/time --output=/tmp/time$$ ${lib}/scripts/Abstract ${granularity} ${language} ${pc1file}.xml ${abstract} >> ${system1}_${granularity}-crossclones-${datestamp}.log 2>&1
        echo ""
        /bin/cat /tmp/time$$
    fi

    result=$?
    echo ""

    if [ $result != 0 ]
    then
        echo "*** ERROR: Abstraction failed, code $result"
        echo "Detailed log in ${system1}_${granularity}-crossclones-${datestamp}.log"
        echo ""
        exit 99
    fi

    pc1file=${pc1file}-abstract

    if [ -s "${pc2file}-abstract.xml" ]
    then
	echo "Using previously abstracted extracted ${granularity} from ${language} files in ${system2}"
    else
	echo "Applying abstraction of ${abstract} to extracted ${granularity} from ${language} files in ${system2}"
	/usr/bin/time --output=/tmp/time$$ ${lib}/scripts/Abstract ${granularity} ${language} ${pc2file}.xml ${abstract} >> ${system2}_${granularity}-crossclones-${datestamp}.log 2>&1
        echo ""
        /bin/cat /tmp/time$$
    fi

    result=$?
    echo ""

    if [ $result != 0 ]
    then
        echo "*** ERROR: Abstraction failed, code $result"
        echo "Detailed log in ${system2}_${granularity}-crossclones-${datestamp}.log"
        echo ""
        exit 99
    fi

    pc2file=${pc2file}-abstract
fi

# Check for custom normalization to be done
if [ "${normalize}" != none ]
then
    if [ -s "${pc1file}-normalize.xml" ]
    then
	echo "Using previously normalized extracted ${granularity} from ${language} files in ${system1}"
    else
	echo "Applying custom normalization ${normalize} to extracted ${granularity} from ${language} files in ${system1}"
	/usr/bin/time --output=/tmp/time$$ ${lib}/scripts/Normalize ${granularity} ${language} ${pc1file}.xml ${normalize} >> ${system1}_${granularity}-crossclones-${datestamp}.log 2>&1
        echo ""
        /bin/cat /tmp/time$$
    fi

    result=$?
    echo ""

    if [ $result != 0 ]
    then
        echo "*** ERROR: Custom normalization failed, code $result"
        echo "Detailed log in ${system1}_${granularity}-crossclones-${datestamp}.log"
        echo ""
        exit 99
    fi

    pc1file=${pc1file}-normalized

    if [ -s "${pc2file}-normalize.xml" ]
    then
	echo "Using previously normalized extracted ${granularity} from ${language} files in ${system2}"
    else
	echo "Applying custom normalization ${normalize} to extracted ${granularity} from ${language} files in ${system2}"
	/usr/bin/time --output=/tmp/time$$ ${lib}/scripts/Normalize ${granularity} ${language} ${pc2file}.xml ${normalize} >> ${system2}_${granularity}-crossclones-${datestamp}.log 2>&1
        echo ""
        /bin/cat /tmp/time$$
    fi

    result=$?
    echo ""

    if [ $result != 0 ]
    then
        echo "*** ERROR: Custom normalization failed, code $result"
        echo "Detailed log in ${system2}_${granularity}-crossclones-${datestamp}.log"
        echo ""
        exit 99
    fi

    pc2file=${pc2file}-normalized
fi

# Find near-miss clones
echo "Finding cross-crossclones between ${minsize} and ${maxsize} lines at UPI threshold ${threshold}"
/usr/bin/time --output=/tmp/time$$ ${lib}/scripts/FindCrossClones ${pc1file}.xml ${pc2file}.xml ${threshold} ${minsize} ${maxsize} >> ${system1}_${granularity}-crossclones-${datestamp}.log 2>&1
echo ""
/bin/cat /tmp/time$$

result=$?
echo ""

if [ $result != 0 ]
then
    echo "*** ERROR: Cross-clone analysis failed, code $result"
    echo "Detailed log in ${system1}_${granularity}-crossclones-${datestamp}.log"
    echo ""
    exit 99
fi

grep "^Found " ${system1}_${granularity}-crossclones-${datestamp}.log | tail -1
echo ""

if [ "${cluster}" == "yes" ]
then
    # Compute clone classes
    echo "Clustering clone pairs into classes"
    /usr/bin/time --output=/tmp/time$$ ${lib}/scripts/ClusterPairs ${pc1file}-crossclones/*-crossclones-0.[0-9].xml >> ${system1}_${granularity}-crossclones-${datestamp}.log 2>&1
    echo ""
    /bin/cat /tmp/time$$

    result=$?
    echo ""

    if [ $result != 0 ]
    then
	echo "*** ERROR: Clustering failed, code $result"
	echo "Detailed log in ${system1}_${granularity}-crossclones-${datestamp}.log"
	echo ""
	exit 99
    fi

    grep "^Clustered " ${system1}_${granularity}-crossclones-${datestamp}.log | tail -1
    echo ""
fi

if [ "${report}" == "yes" ]
then
    # Get original sources
    echo "Getting original sources for all clones"
    /usr/bin/time --output=/tmp/time$$ ${lib}/scripts/GetSource ${pc1file}-crossclones/*-crossclones-0.[0-9].xml >> ${system1}_${granularity}-crossclones-${datestamp}.log 2>&1
    echo ""
    /bin/cat /tmp/time$$

    result=$?
    echo ""

    if [ $result != 0 ]
    then
	echo "*** ERROR: Get sources failed, code $result"
	echo "Detailed log in ${system1}_${granularity}-crossclones-${datestamp}.log"
	echo ""
	exit 99
    fi

    if [ "${cluster}" == "yes" ]
    then
        /usr/bin/time --output=/tmp/time$$ ${lib}/scripts/GetSource ${pc1file}-crossclones/*-crossclones-0.[0-9]-classes.xml >> ${system1}_${granularity}-crossclones-${datestamp}.log 2>&1
        echo ""
        /bin/cat /tmp/time$$

        result=$?
        echo ""

        if [ $result != 0 ]
        then
            echo "*** ERROR: Get sources failed, code $result"
            echo "Detailed log in ${system1}_${granularity}-crossclones-${datestamp}.log"
            echo ""
            exit 99
        fi
    fi

    # Convert to HTML
    echo "Making HTML reports"
    /usr/bin/time --output=/tmp/time$$ ${lib}/scripts/MakePairHTML ${pc1file}-crossclones/*-crossclones-0.[0-9]-withsource.xml >> ${system1}_${granularity}-crossclones-${datestamp}.log 2>&1
    echo ""
    /bin/cat /tmp/time$$

    result=$?
    echo ""

    if [ $result != 0 ]
    then
	echo "*** ERROR: Make HTML failed, code $result"
	echo "Detailed log in ${system1}_${granularity}-crossclones-${datestamp}.log"
	echo ""
	exit 99
    fi

    if [ "${cluster}" == "yes" ]
    then
        /usr/bin/time --output=/tmp/time$$ ${lib}/scripts/MakePairHTML ${pc1file}-crossclones/*-crossclones-0.[0-9]-classes-withsource.xml >> ${system1}_${granularity}-crossclones-${datestamp}.log 2>&1
        echo ""
        /bin/cat /tmp/time$$

        result=$?
        echo ""

        if [ $result != 0 ]
        then
            echo "*** ERROR: Make HTML failed, code $result"
            echo "Detailed log in ${system1}_${granularity}-crossclones-${datestamp}.log"
            echo ""
            exit 99
        fi
    fi

elif [ "${report}" == "normalized" ]
then
    # Get normalized sources
    echo "Getting normalized sources for all clones"
    /usr/bin/time --output=/tmp/time$$ ${lib}/scripts/GetNormSource ${pcfile}-clones/*-clones-0.[0-9].xml >> ${system1}_${granularity}-clones-${datestamp}.log 2>&1
    echo ""
    /bin/cat /tmp/time$$

    result=$?
    echo ""

    if [ $result != 0 ]
    then
	echo "*** ERROR: Get normalized sources failed, code $result"
	echo "Detailed log in ${system1}_${granularity}-clones-${datestamp}.log"
	echo ""
	exit 99
    fi

    if [ "${cluster}" == "yes" ]
    then
        /usr/bin/time --output=/tmp/time$$ ${lib}/scripts/GetNormSource ${pcfile}-clones/*-clones-0.[0-9]-classes.xml >> ${system1}_${granularity}-clones-${datestamp}.log 2>&1
        echo ""
        /bin/cat /tmp/time$$

        result=$?
        echo ""

        if [ $result != 0 ]
        then
	    echo "*** ERROR: Get normalized sources failed, code $result"
	    echo "Detailed log in ${system1}_${granularity}-clones-${datestamp}.log"
	    echo ""
	    exit 99
        fi
    fi

    # Convert to HTML
    echo "Making HTML reports"
    /usr/bin/time --output=/tmp/time$$ ${lib}/scripts/MakePairHTML ${pcfile}-clones/*-clones-0.[0-9]-normsource.xml >> ${system1}_${granularity}-clones-${datestamp}.log 2>&1
    echo ""
    /bin/cat /tmp/time$$

    result=$?
    echo ""

    if [ $result != 0 ]
    then
	echo "*** ERROR: Make HTML failed, code $result"
	echo "Detailed log in ${system1}_${granularity}-clones-${datestamp}.log"
	echo ""
	exit 99
    fi

    if [ "${cluster}" == "yes" ]
    then
        /usr/bin/time --output=/tmp/time$$ ${lib}/scripts/MakePairHTML ${pcfile}-clones/*-clones-0.[0-9]-classes-normsource.xml >> ${system1}_${granularity}-clones-${datestamp}.log 2>&1
        echo ""
        /bin/cat /tmp/time$$

        result=$?
        echo ""

        if [ $result != 0 ]
        then
	    echo "*** ERROR: Make HTML failed, code $result"
	    echo "Detailed log in ${system1}_${granularity}-clones-${datestamp}.log"
	    echo ""
	    exit 99
        fi
    fi
fi

# Finish up
/bin/rm -f /tmp/time$$

echo "Done."
echo ""
echo "Detailed log in ${system1}_${granularity}-crossclones-${datestamp}.log"
echo "Results in ${pc1file}-crossclones"
echo ""
date
echo ""
